#######################################
#Quasi-Poisson Models with Simulations
#######################################

rm(list = ls())
setwd() #Set Working Directory

# Packages
pacman::p_load(rtweet,
               ggplot2,
               purrrlyr,
               dplyr,
               readxl,
               writexl,
               broom,
               tidyverse,
               dotwhisker,
               lubridate,
               stargazer,
               ggpubr, 
               jtools,
               rpart,
               caret,
               reshape2,
               ipred,
               RColorBrewer,
               rpart.plot,
               MASS,
               randomForest,
               dplyr,
               car,
               vtable,
               effects,
               interactions,
               doParallel,
               foreach
               
)


#Import data
df <- readRDS("df.RDS")
##########
##########
#QP Models
##########
##########

# Creating wartime variable 
df %>%
  mutate(War = created_at > "2022-02-24") -> df

# coercing for formatting 
df$War <- ifelse(df$War == "TRUE", 1, 0)

df$user_id <- as.factor(df$user_id)  
df$conspir.po <- as.factor(df$conspir.po)
df$conspir.collusion <- as.factor(df$conspir.collusion)
df$conspir.rus <- as.factor(df$conspir.rus)
df$conspir.tusk <- as.factor(df$conspir.tusk)
df$month <- as.factor(df$month)
set.seed(611)
#########
#########
# Models
#########
#########
##########
#Likes
##########

m.conspir.po.fav <- glm(favorite_count ~ conspir.po*War + log_follow + 
                          log_friends + verified + month, 
                        family = "quasipoisson",
                        data = df)
m.conspir.tusk.fav <- glm(favorite_count ~ conspir.tusk*War + log_follow + 
                            log_friends + verified + month, 
                          family = "quasipoisson",
                          data = df)
m.conspir.rus.fav <- glm(favorite_count ~ conspir.rus*War + log_follow + 
                           log_friends + verified + month, 
                         family = "quasipoisson",
                         data = df)
m.conspir.collusion.fav <- glm(favorite_count ~ conspir.collusion*War + log_follow + 
                                 log_friends + verified + month, 
                               family = "quasipoisson",
                               data = df)



##########
#Retweets
##########
m.conspir.po.rt <- glm(retweet_count ~ conspir.po*War + log_follow + 
                         log_friends + verified + month, 
                       family = "quasipoisson",
                       data = df)

m.conspir.tusk.rt <- glm(retweet_count ~ conspir.tusk*War  + log_follow + 
                           log_friends + verified + month, 
                         family = "quasipoisson",
                         data = df)

m.conspir.rus.rt <- glm(retweet_count ~ conspir.rus*War + log_follow + 
                          log_friends + verified + month, 
                        family = "quasipoisson",
                        data = df)

m.conspir.collusion.rt <- glm(retweet_count ~ conspir.collusion*War  + log_follow + 
                                log_friends + verified + month, 
                              family = "quasipoisson",
                              data = df)


###############
#Bootstrapping 
##############
#Create function for bootstrapping
boot_pi <- function(model, pdata, n, p) {
  odata <- model$data
  lp <- (1 - p) / 2
  up <- 1 - lp
  set.seed(2016)
  seeds <- round(runif(n, 1, 1000), 0)
  boot_y <- foreach(i = 1:n, .combine = rbind) %dopar% {
    set.seed(seeds[i])
    bdata <- odata[sample(seq(nrow(odata)), size = nrow(odata), replace = TRUE), ]
    bpred <- predict(update(model, data = bdata), type = "response", newdata = pdata)
    rpois(length(bpred), lambda = bpred)
  }
  boot_ci <- t(apply(boot_y, 2, quantile, c(lp, up)))
  return(data.frame(pred = predict(model, newdata = pdata, type = "response"), lower = boot_ci[, 1], upper = boot_ci[, 2]))
}
###
#PO
###
sim.df <- data.frame(conspir.po = sample(rep(c(0,1,0,1), length.out = 250)),
                     War = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))

sim.df$conspir.po <- as.factor(sim.df$conspir.po)
sim.df$month <- as.factor(sim.df$month) 

preds <-  predict.glm(m.conspir.po.fav, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.po.fav, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.po, War) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))

#Calculate differences
po.fav.diff <- as.data.frame(rbind(c(preds3$pred2[3] - preds3$pred2[1], 
                                     preds3$upper2[3] - preds3$upper2[1], 
                                     preds3$lower2[3] - preds3$lower2[1],
                                     0), #0 for War no
                                   c(preds3$pred2[4] - preds3$pred2[2], 
                                     preds3$upper2[4] - preds3$upper2[2], 
                                     preds3$lower2[4] - preds3$lower2[2],
                                     1)) )#1 for War yes



#Tusk
sim.df <- data.frame(conspir.tusk = sample(rep(c(0,1,0,1), length.out = 250)),
                     War = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))

sim.df$conspir.tusk <- as.factor(sim.df$conspir.tusk)
sim.df$month <- as.factor(sim.df$month)

preds <-  predict.glm(m.conspir.tusk.fav, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.tusk.fav, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.tusk, War) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences
tusk.fav.diff <- as.data.frame(rbind(c(preds3$pred2[3] - preds3$pred2[1], 
                                       preds3$upper2[3] - preds3$upper2[1], 
                                       preds3$lower2[3] - preds3$lower2[1],
                                       0), #0 for War no
                                     c(preds3$pred2[4] - preds3$pred2[2], 
                                       preds3$upper2[4] - preds3$upper2[2], 
                                       preds3$lower2[4] - preds3$lower2[2],
                                       1)) )#1 for War yes

#Russia
#Note: In our original submission, we included analyses for the CT that Russia
#or Putin caused the plane to crash. We maintain the analysis below for full
#transparency and to fully replicate the simulation seeds for all subsequent models.
sim.df <- data.frame(conspir.rus = sample(rep(c(0,1,0,1), length.out = 250)),
                     War = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))

sim.df$conspir.rus <- as.factor(sim.df$conspir.rus)
sim.df$month <- as.factor(sim.df$month) 


preds <-  predict.glm(m.conspir.rus.fav, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.rus.fav, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.rus, War) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))

#Calculate differences
rus.fav.diff <- as.data.frame(rbind(c(preds3$pred2[3] - preds3$pred2[1], 
                                      preds3$upper2[3] - preds3$upper2[1], 
                                      preds3$lower2[3] - preds3$lower2[1],
                                      0), #0 for War no
                                    c(preds3$pred2[4] - preds3$pred2[2], 
                                      preds3$upper2[4] - preds3$upper2[2], 
                                      preds3$lower2[4] - preds3$lower2[2],
                                      1)) )#1 for War yes

#Collusion
sim.df <- data.frame(conspir.collusion = sample(rep(c(0,1,0,1), length.out = 250)),
                     War = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))

sim.df$conspir.collusion <- as.factor(sim.df$conspir.collusion)
sim.df$month <- as.factor(sim.df$month)  

preds <-  predict.glm(m.conspir.collusion.fav, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.collusion.fav, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.collusion, War) %>%
                          summarize(across(pred:upper, mean)))

preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences
col.fav.diff <- as.data.frame(rbind(c(preds3$pred2[3] - preds3$pred2[1], 
                                      preds3$upper2[3] - preds3$upper2[1], 
                                      preds3$lower2[3] - preds3$lower2[1],
                                      0), #0 for War no
                                    c(preds3$pred2[4] - preds3$pred2[2], 
                                      preds3$upper2[4] - preds3$upper2[2], 
                                      preds3$lower2[4] - preds3$lower2[2],
                                      1)) )#1 for War yes


#############
#Combine Favs
#############
pred_diffs <- as.data.frame(rbind(po.fav.diff, tusk.fav.diff, col.fav.diff))
colnames(pred_diffs) <- c("estimate", "UCI", "LCI", "War")
pred_diffs$model <- c("PO", "PO", "Tusk", "Tusk", "Collusion", "Collusion")
pred_diffs$War <- as.factor(pred_diffs$War)
pred_diffs_fav <- pred_diffs

##########################

##### Retweets ####

##########
#Retweets
##########

###
#PO
###
sim.df <- data.frame(conspir.po = sample(rep(c(0,1,0,1), length.out = 250)),
                     War = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))

sim.df$conspir.po <- as.factor(sim.df$conspir.po)
sim.df$month <- as.factor(sim.df$month) 

preds <-  predict.glm(m.conspir.po.rt, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.po.rt, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.po, War) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))

#Calculate differences
po.rt.diff <- as.data.frame(rbind(c( preds3$pred2[3] - preds3$pred2[1],
                                     preds3$lower2[3] - preds3$lower2[1],
                                     preds3$upper2[3] - preds3$upper2[1],  
                                     0), #0 for War no
                                  c(preds3$pred2[4] - preds3$pred2[2], 
                                    preds3$upper2[4] - preds3$upper2[2], 
                                    preds3$lower2[4] - preds3$lower2[2],
                                    1)) )#1 for War yes



#Tusk
sim.df <- data.frame(conspir.tusk = sample(rep(c(0,1,0,1), length.out = 250)),
                     War = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))

sim.df$conspir.tusk <- as.factor(sim.df$conspir.tusk)
sim.df$month <- as.factor(sim.df$month)

preds <-  predict.glm(m.conspir.tusk.rt, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.tusk.rt, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.tusk, War) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences
tusk.rt.diff <- as.data.frame(rbind(c(preds3$pred2[3] - preds3$pred2[1], 
                                      preds3$upper2[3] - preds3$upper2[1], 
                                      preds3$lower2[3] - preds3$lower2[1],
                                      0), #0 for War no
                                    c(preds3$pred2[4] - preds3$pred2[2], 
                                      preds3$upper2[4] - preds3$upper2[2], 
                                      preds3$lower2[4] - preds3$lower2[2],
                                      1)) )#1 for War yes

#Russia
sim.df <- data.frame(conspir.rus = sample(rep(c(0,1,0,1), length.out = 250)),
                     War = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))

sim.df$conspir.rus <- as.factor(sim.df$conspir.rus)
sim.df$month <- as.factor(sim.df$month) 


preds <-  predict.glm(m.conspir.rus.rt, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.rus.rt, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.rus, War) %>%
                          summarize(across(pred:upper, mean)))
preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))

#Calculate differences
rus.rt.diff <- as.data.frame(rbind(c(preds3$pred2[3] - preds3$pred2[1], 
                                     preds3$upper2[3] - preds3$upper2[1], 
                                     preds3$lower2[3] - preds3$lower2[1],
                                     0), #0 for War no
                                   c(preds3$pred2[4] - preds3$pred2[2], 
                                     preds3$upper2[4] - preds3$upper2[2], 
                                     preds3$lower2[4] - preds3$lower2[2],
                                     1)) )#1 for War yes

#Collusion
sim.df <- data.frame(conspir.collusion = sample(rep(c(0,1,0,1), length.out = 250)),
                     War = sample(rep(c(0,1,0,1), length.out = 250)),
                     log_follow = sample(seq(min(df$log_follow, na.rm = T), 
                                             max(df$log_follow, na.rm = T),
                                             length.out = 500)),
                     log_friends = sample(seq(min(df$log_friends, na.rm = T), 
                                              max(df$log_friends, na.rm = T),
                                              length.out = 500)),
                     verified = sample(c(rep(TRUE, 33), rep(FALSE, 467))),
                     month = c(rep(1:3,length.out = 42), 
                               rep(5:12, length.out = 112),
                               rep(4, length.out = 346)))

sim.df$conspir.collusion <- as.factor(sim.df$conspir.collusion)
sim.df$month <- as.factor(sim.df$month)  

preds <-  predict.glm(m.conspir.collusion.rt, sim.df, type = "response", se.fit = F)
preds2 <- cbind(sim.df, boot_pi(m.conspir.collusion.rt, sim.df, 1000, 0.95))
preds3 <- as.data.frame(preds2 %>% group_by(conspir.collusion, War) %>%
                          summarize(across(pred:upper, mean)))

preds3$pred2 <- exp(log(preds3$pred))
preds3$upper2 <- exp(log(preds3$upper))
preds3$lower2 <- exp(log(preds3$lower))
#Calculate differences
col.rt.diff <- as.data.frame(rbind(c(preds3$pred2[3] - preds3$pred2[1], 
                                     preds3$upper2[3] - preds3$upper2[1], 
                                     preds3$lower2[3] - preds3$lower2[1],
                                     0), #0 for War no
                                   c(preds3$pred2[4] - preds3$pred2[2], 
                                     preds3$upper2[4] - preds3$upper2[2], 
                                     preds3$lower2[4] - preds3$lower2[2],
                                     1)) )#1 for War yes


#############
#Combine RTs
#############
pred_diffs <- as.data.frame(rbind(po.rt.diff, tusk.rt.diff, col.rt.diff))
colnames(pred_diffs) <- c("estimate", "UCI", "LCI", "War")
pred_diffs$model <- c("PO", "PO", "Tusk", "Tusk", "Collusion", "Collusion")
pred_diffs$War <- as.factor(pred_diffs$War)
pred_diffs_rt <- pred_diffs


### Now for one image

pred_diffs_rt$Measure <- c("Retweets")
pred_diffs_fav$Measure <- c("Likes")
pred_diffs_war_together <- rbind(pred_diffs_rt, pred_diffs_fav)
preds_diffs_war_together <- pred_diffs_war_together

### plot of differences 
pd <- position_dodge(width = 0.4)

ggplot(preds_diffs_war_together, aes(x = Measure,
                                     y = estimate, color = factor(War),
                                     #   shape = factor(War)
)) + 
  geom_point(position = pd, size = 1.5) + 
  facet_wrap(~model, ncol = 1) +
  coord_flip() +
  geom_errorbar(aes(ymin = LCI, ymax = UCI), width = .2, position = pd, size = .8) +
  labs(y = "Difference in the Predicted Level \n of Engagement When Content Included",
       x = "Measure",
       #  shape = "War?",
       # linetype = "War?"
  )+  
  theme_classic(base_size=20) +
  theme(legend.position = "bottom") +
  scale_color_grey(start = .5, end = .2, name = "War?", 
                   labels = c("Before February 24", "After February 24"),
                   guide = guide_legend(reverse = TRUE)
  )+
  #  scale_shape_discrete(labels = c("Before February 24", "After February 24"),
  #        guide = guide_legend(reverse = TRUE)) +
  ylim(-43, 43)  +
  geom_hline(aes(yintercept = 0), color = "blue", linetype = "dotted") -> together

together
